# setwd("~/Dropbox/T&G project survey/Ukraine/replication_JOP")
# libraries
library(tidyverse)
library(cobalt)
library(patchwork)
library(paletteer)
library(MatchIt)
library(estimatr)
library(lmtest)
library(stargazer)
source("func.R")

# ggplot defaults: geoms theme
update_geom_defaults("text", list(family = "Archivo Narrow"))
update_geom_defaults("label", list(family = "Archivo Narrow"))
theme_set(theme_nice())

## Load data
df = read_rds("data/clean_data.rds")

## Split sample across bandwidths

# Select as breaks days in March with at least 200 responses
date_breaks = names(table(df$date[df$post==1])[table(df$date[df$post==1]) > 200])
# Last date should be last day
date_breaks = as.Date(c(date_breaks, as.character(max(df$date, na.rm = TRUE))))
# Split
df_list = vector("list", length(date_breaks))
for(d in seq_along(date_breaks)){df_list[[d]] = df %>% filter(date <= date_breaks[d])}

## Created matched datasets

# Matching variables and formula
mvars = c("edad", "Q14", "ideo", "sexo", "clase_social_r", "educacion_r")
f = as.formula(paste0("post ~ ", paste(mvars, collapse = "+")))

# List of matched datasets
matched_dfs = vector("list", length(date_breaks))
for(i in seq_along(date_breaks)){
  m = matchit(f, data = df_list[[i]], method = "full", estimand = "ATC")
  matched_dfs[[i]] = match.data(m)
}


## Main result by bandwidth

# List of models
model_list = vector("list", length(date_breaks)*2)
for(m in seq(2, length(date_breaks)*2, 2)){
  df_i = m/2
  model_list[[m-1]] = lm_robust(Q60 ~ post, data = matched_dfs[[df_i]],
    weights = weights, clusters = subclass, se_type = "stata")
  model_list[[m]] = lm_robust(Q61 ~ post, data = matched_dfs[[df_i]],
    weights = weights, clusters = subclass, se_type = "stata")
}

# Check
# for(i in 1:20){print(summary(model_list[[i]])$coefficients[,1])}

# Get coefficients
m_q_coefs_l = vector("list", length(model_list))
for(j in seq_along(model_list)){
  m_q_coefs_l[[j]] = tidy(model_list[[j]], conf.int = TRUE) %>% filter(term == "post")
}
# Group as list and transform
m_q_coefs = do.call(bind_rows, m_q_coefs_l) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National identification", "Q61" = "Regional identification")) %>%
  mutate(max_date = rep(date_breaks, each = 2)) %>%
  mutate(max_date = as.character(format(max_date, "%B %d"))) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main = ggplot(m_q_coefs, aes(x = estimate, y = max_date)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "Coefficient estimate of invasion", y = "Including 'treated' responses until:\n",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~outcome2)
ggsave("figures/main_models_bandwidth.pdf", width = 6, height = 5, device = cairo_pdf)
